####################################################################
## author:    Michael L. Wicki
## contact:   michael.wicki@istp.ethz.ch, ETH Zurich
## file name: mobility_conjoint_predictions.R
## Context:   Predictions
## started:   2019-01-28
####################################################################

rm(list = ls())
getwd()
setwd("\\\\gess-fs.d.ethz.ch\\home$\\wimi\\Documents\\Sufficiency\\Conjoint")
set.seed(42)
#.libPaths( "\\\\gess-fs.d.ethz.ch\\home$\\wimi\\Documents\\R\\win-library\\3.3" )

################################ load packages ################################ 

#install.packages("Matrix")
#install.packages("ggthemes")
#install.packages("cjoint")
#install.packages("ggplot2")
#install.packages("effects")
#install.packages("lme4")
#install.packages("data.table")
#install.packages("dplyr")
#install.packages("nFactors")
#install.packages("texreg")
#install.packages("tidyr")
#install.packages("forcats")
#install.packages("Rmisc")
#install.packages("cowplot")
#install.packages("scales")
#install.packages("cregg")


library(Matrix)
library(ggthemes)
library(cjoint)
library(ggplot2)
library(effects)
library(lme4)
library(data.table)
library(dplyr)
library(nFactors)
library(texreg)
library(tidyr)
library(forcats)
library(Rmisc)
library(cowplot)
library(scales)
library(cregg)
library(reshape2)


################################ load dataset & design ################################ 

load("conjoint_design.RData")

#estimate all theoretically possible package combinations by countries

hyp_df = expand.grid(attrib1_lab = unique(df_conj$attrib1_lab), attrib2_lab = unique(df_conj$attrib2_lab),
                     attrib3_lab = unique(df_conj$attrib3_lab), attrib4_lab = unique(df_conj$attrib4_lab), 
                     attrib5_lab = unique(df_conj$attrib5_lab), attrib6_lab = unique(df_conj$attrib6_lab), 
                     attrib7_lab = unique(df_conj$attrib7_lab), country = unique(df_conj$country)
                     )

#----------
out_m = lm(rate  ~ (attrib1_lab + attrib2_lab + attrib3_lab + attrib4_lab+attrib5_lab+attrib6_lab+attrib7_lab) * country, data = df_conj)

hyp_df$pred_out = predict(out_m, newdata = hyp_df)

fig_rate_average <- ggplot(hyp_df, aes(x=pred_out, colour=country)) + 
  geom_density(size=1.2) + 
  theme_bw() +
  theme(panel.grid.major.x = element_line(size=.5, color="gray80"),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(size=.5, color="gray80" )) +
  xlab("Distribution of average ratings for policy-packages") +
  ylab("Density") +
  scale_colour_manual(values = c("US" = "#b2abd2", "DE" = "#5e3c99", "CN" = "#e66101")) +
  scale_x_continuous(breaks = c(1,2,3,4,5,6,7), labels = c("1","2", "3", "4", "5", "6", "7")) +
  #coord_cartesian(ylim = c(.0257, 0.55), xlim = c(1.27, 6.73)) +
  #geom_vline(xintercept = 4.740764, color = "#e66101", size=1.2, linetype = "longdash") +
  #geom_vline(xintercept = 3.651830, color = "#5e3c99", size=1.2, linetype = "longdash") +
  #geom_vline(xintercept = 4.069573, color = "#b2abd2", size=1.2, linetype = "longdash") +
  guides(colour=FALSE) +
  geom_vline(xintercept = 4, lty="dashed") +
  geom_vline(xintercept = 5, lty="dashed")

dat.m <- melt(hyp_df,id.vars=c("pred_out", "country"), measure.vars=c("attrib1_lab", "attrib2_lab", "attrib3_lab", "attrib4_lab", "attrib5_lab", "attrib6_lab", "attrib7_lab"))

nrow(hyp_df[hyp_df$pred_out >= 4 & hyp_df$country == "CN",])
nrow(hyp_df[hyp_df$pred_out >= 4 & hyp_df$country == "DE",])
nrow(hyp_df[hyp_df$pred_out >= 4 & hyp_df$country == "US",])
nrow(hyp_df[hyp_df$pred_out >= 5 & hyp_df$country == "CN",])
nrow(hyp_df[hyp_df$pred_out >= 5 & hyp_df$country == "DE",])
nrow(hyp_df[hyp_df$pred_out >= 5 & hyp_df$country == "US",])


dat.m$att <- factor(dat.m$value, levels = c(rev(c("3) No new tax", "2) Increasing prices by 15%", "1) Increasing prices by 30%",
                                                        "2) General government budget", "1) Public environmental and climate protection programs", "3) Public programs for low-income households", "4) Reduce income taxes", "5) No tax revenues",
                                                        "3) Keeping subsidies at current level", "2) Halving subsidies ", "1) Eliminating subsidies",
                                                        "5) Never restricted", "4) 1 day per week", "3) 3 days per week", "2) 5 days per week", "1) Always restricted", 
                                                        "3) No new requirements", "2) At least 40 miles per gallon ", "1) At least 55 miles per gallon ",
                                                        "3) No increase of support", "2) Reducing prices by 15%", "1) Reducing prices by 30%",
                                                        "3) No campaigns", "2) Occasional campaigns", "1) Frequent campaigns"))),
                        labels = c(rev(c("No new tax", "Increasing prices by 15%", "Increasing prices by 30%", 
                                         "General government budget", "Public environmental and climate protection programs", "Public programs for low-income households", "Reduce income taxes", "No tax revenues",
                                         "Keeping subsidies at current level", "Halving subsidies", "Eliminating subsidies",
                                         "Never restricted", "1 day per week", "3 days per week", "5 days per week", "Always restricted", 
                                         "No new requirements", "Maximum consumption of 5.9 litres per 100 kilometres", "Maximum consumption of 4.3 litres per 100 kilometres",
                                         "No increase of support", "Reducing prices by 15%", "Reducing prices by 30%",
                                         "No campaigns", "Occasional campaigns", "Frequent campaigns"))))

dat.m$policy <- ifelse(dat.m$att == "No new tax" |  dat.m$att == "Increasing prices by 15%" |  dat.m$att == "Increasing prices by 30%", "1", ifelse(
  dat.m$att == "General government budget" | dat.m$att == "Public environmental and climate protection programs" | dat.m$att == "Public programs for low-income households" | dat.m$att == "Reduce income taxes" | dat.m$att == "No tax revenues", "2", ifelse(
    dat.m$att == "Keeping subsidies at current level" | dat.m$att == "Halving subsidies" | dat.m$att == "Eliminating subsidies", "3", ifelse(
      dat.m$att == "Never restricted" | dat.m$att == "1 day per week" | dat.m$att == "3 days per week" | dat.m$att == "5 days per week" | dat.m$att == "Always restricted", "4", ifelse(
        dat.m$att == "No new requirements" | dat.m$att == "Maximum consumption of 5.9 litres per 100 kilometres" | dat.m$att == "Maximum consumption of 4.3 litres per 100 kilometres", "5", ifelse(
          dat.m$att == "No campaigns" | dat.m$att == "Occasional campaigns" | dat.m$att == "Frequent campaigns", "6", ifelse(
            dat.m$att == "No increase of support" | dat.m$att == "Reducing prices by 15%" | dat.m$att == "Reducing prices by 30%", "7",
            "8")))))))

dat.m$policy = factor(dat.m$policy, levels=c(1:8), labels=c("Tax","Earmarking","Industry Subsidy", "Access restriction", "Standards", "Campaign", "PT prices", "Other"))
dat.m$country = factor(dat.m$country, levels=c("CN", "DE", "US"), labels=c("China","Germany","USA"))


pred_bpl <- ggplot(dat.m, aes(x=att, y=pred_out)) + 
  geom_boxplot(width=.4, outlier.size = 1) +
  coord_flip(ylim = c(3, 5.5)) +
  theme_bw() +
  theme(panel.grid.major.x = element_line(size=.5, color="gray80"),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(size=.5, color="gray80" )) +
  ylab("Distribution of predicted rating outcomes for policy-packages") +
  xlab("") +
  scale_y_continuous(breaks = c(1,2,3,4,5,6,7), labels = c("1","2", "3", "4", "5", "6", "7")) +
  facet_grid(policy~country, scales = "free") +
  geom_hline(yintercept = 4, lty="dashed") +
  geom_hline(yintercept = 5, lty="dashed")

ggsave(filename = "./figs/boxplot_prediction.png", pred_bpl, width = 10, height = 8.2)

dodge <- position_dodge(width = 1)
pred_viol <- ggplot(dat.m, aes(x=att, y=pred_out)) + 
  geom_violin(fill = "gray80", position = dodge) +
  geom_boxplot(width=.4, position = dodge, outlier.size = 1) +
    coord_flip(ylim = c(3, 5.5)) +
  theme_bw() +
  theme(panel.grid.major.x = element_line(size=.5, color="gray80"),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(size=.5, color="gray80" )) +
  ylab("Distribution of predicted rating outcomes for policy-packages") +
  xlab("") +
  facet_grid(policy~country, scales = "free") +
  scale_y_continuous(breaks = c(1,2,3,4,5,6,7), labels = c("1","2", "3", "4", "5", "6", "7")) +
  geom_hline(yintercept = 4, lty="dashed") +
  geom_hline(yintercept = 5, lty="dashed") 
pred_viol

ggsave(filename = "./figs/violin_prediction.png", pred_viol, width = 10, height = 8.2)

######Policy Packages Germany
hyp_df[hyp_df$pred_out >= 4 & hyp_df$country == "DE" & hyp_df$attrib1_lab == "2) Increasing prices by 15%" & hyp_df$attrib3_lab == "3) 3 days per week",]
#2) Increasing prices by 15% 4) Reduce income taxes 3) 3 days per week 1) Reducing prices by 30% 2) At least 40 miles per gallon  1) Frequent campaigns 1) Eliminating subsidies OR 2) Halving subsidies

hyp_df[hyp_df$pred_out >= 4 & hyp_df$country == "DE" & hyp_df$attrib1_lab == "2) Increasing prices by 15%" & hyp_df$attrib4_lab == "2) Reducing prices by 15%",]
#2) Increasing prices by 15% 4) Reduce income taxes 5) Never restricted 2) Reducing prices by 15% 2) At least 40 miles per gallon    1) Frequent campaigns OR 2) Occasional campaigns 1) Eliminating subsidies OR 2) Halving subsidies

######Policy Packages USA
hyp_df[hyp_df$pred_out >= 4 & hyp_df$country == "US" & hyp_df$attrib1_lab == "1) Increasing prices by 30%" & hyp_df$attrib3_lab == "2) 5 days per week",]
#1) Increasing prices by 30%  NOT GENERAL BUDGET 2) 5 days per week 1) Reducing prices by 30% 2) At least 40 miles per gallon  2) Occasional campaigns  OR 1) Frequent campaigns  2) Halving subsidies OR 1) Eliminating subsidies

######Policy Packages China
hyp_df[hyp_df$pred_out >= 5 & hyp_df$country == "CN" & hyp_df$attrib1_lab == "2) Increasing prices by 15%" & hyp_df$attrib3_lab == "3) 3 days per week" & hyp_df$attrib4_lab == "2) Reducing prices by 15%",]
#2) Increasing prices by 15% 4) Reduce income taxes 3) 3 days per week 2) Reducing prices by 15% 2) At least 40 miles per gallon  1) Frequent campaigns 1) Eliminating subsidies OR 2) Halving subsidies

